In [1]:
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#%load_ext autoreload
#%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
This notebook matches Current Population Survey data from https://cps.ipums.org/cps/ with Social Class information from https://en.wikipedia.org/wiki/Household_income_in_the_United_States
In [4]:
pwd
Out[4]:
In [3]:
cd C:\Users\gary\Documents\data
In [5]:
ls
In [6]:
filename = 'cps_00001.csv/cps_00001.csv'
resp = pd.read_csv(filename)
resp.describe()
Out[6]:
In [7]:
resp.info()
In [8]:
list(resp)
Out[8]:
In [9]:
resp.head()
Out[9]:
In [33]:
percent = 0.01
n = percent * len(resp)
int(n)
Out[33]:
In [35]:
top01 = resp.nlargest(int(n), 'HHINCOME')
top01.HHINCOME.min(), top01.HHINCOME.max()
Out[35]:
In [11]:
np.percentile(resp.HHINCOME, 12)
Out[11]:
In [15]:
np.percentile(resp.HHINCOME, 24)
Out[15]:
In [16]:
np.percentile(resp.HHINCOME, 54)
Out[16]:
In [17]:
np.percentile(resp.HHINCOME, 84)
Out[17]:
In [18]:
np.percentile(resp.HHINCOME, 99)
Out[18]:
In [ ]: